library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
[30m── [1mAttaching packages[22m ──────────────────────────────────────────────────────── tidyverse 1.3.0 ──[39m
[30m[32m✓[30m [34mggplot2[30m 3.3.3 [32m✓[30m [34mpurrr [30m 0.3.4
[32m✓[30m [34mtibble [30m 3.1.1 [32m✓[30m [34mdplyr [30m 1.0.5
[32m✓[30m [34mtidyr [30m 1.1.3 [32m✓[30m [34mstringr[30m 1.4.0
[32m✓[30m [34mreadr [30m 1.4.0 [32m✓[30m [34mforcats[30m 0.5.0[39m
[30m── [1mConflicts[22m ─────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31mx[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31mx[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()[39m
library(phyloseq)
library(phangorn)
Loading required package: ape
library(readr)
#library(seqinr)
library(ape)
library(vegan)
Loading required package: permute
Loading required package: lattice
This is vegan 2.5-7
Attaching package: ‘vegan’
The following objects are masked from ‘package:phangorn’:
diversity, treedist
library(RColorBrewer)
library(microbiome)
microbiome R package (microbiome.github.com)
Copyright (C) 2011-2020 Leo Lahti,
Sudarshan Shetty et al. <microbiome.github.io>
Attaching package: ‘microbiome’
The following object is masked from ‘package:vegan’:
diversity
The following object is masked from ‘package:phangorn’:
diversity
The following object is masked from ‘package:ggplot2’:
alpha
The following object is masked from ‘package:base’:
transform
library(compositions)
Welcome to compositions, a package for compositional data analysis.
Find an intro with "? compositions"
Attaching package: ‘compositions’
The following object is masked from ‘package:ape’:
balance
The following objects are masked from ‘package:stats’:
cor, cov, dist, var
The following objects are masked from ‘package:base’:
%*%, norm, scale, scale.default
library(SpiecEasi)
Attaching package: ‘SpiecEasi’
The following objects are masked from ‘package:compositions’:
alr, clr
library(otuSummary)
library(psych)
Attaching package: ‘psych’
The following objects are masked from ‘package:SpiecEasi’:
cor2cov, shannon
The following objects are masked from ‘package:compositions’:
ellipses, pairwisePlot
The following object is masked from ‘package:microbiome’:
alpha
The following objects are masked from ‘package:ggplot2’:
%+%, alpha
library(Matrix)
Attaching package: ‘Matrix’
The following objects are masked from ‘package:SpiecEasi’:
tril, triu
The following objects are masked from ‘package:tidyr’:
expand, pack, unpack
library(igraph)
Attaching package: ‘igraph’
The following object is masked from ‘package:SpiecEasi’:
make_graph
The following object is masked from ‘package:compositions’:
normalize
The following object is masked from ‘package:microbiome’:
diversity
The following object is masked from ‘package:vegan’:
diversity
The following object is masked from ‘package:permute’:
permute
The following object is masked from ‘package:phangorn’:
diversity
The following objects are masked from ‘package:ape’:
edges, mst, ring
The following objects are masked from ‘package:dplyr’:
as_data_frame, groups, union
The following objects are masked from ‘package:purrr’:
compose, simplify
The following object is masked from ‘package:tidyr’:
crossing
The following object is masked from ‘package:tibble’:
as_data_frame
The following objects are masked from ‘package:stats’:
decompose, spectrum
The following object is masked from ‘package:base’:
union
# Helper functions from J. Cram https://biovcnet.github.io/_pages/NetworkScience_SparCC.nb
pass <- function(x){x}
# Get lower triangle of the correlation matrix
get_lower_tri<-function(cormat){
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
# Get upper triangle of the correlation matrix
get_upper_tri <- function(cormat){
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
reorder_cormat <- function(cormat){
# Use correlation between variables as distance
dd <- as.dist((1-cormat)/2)
hc <- hclust(dd)
cormat <-cormat[hc$order, hc$order]
}
reorder_cor_and_p <- function(cormat, pmat){
dd <- as.dist((1-cormat)/2)
hc <- hclust(dd)
cormat <-cormat[hc$order, hc$order]
pmat <- pmat[hc$order, hc$order]
list(r = cormat, p = pmat)
}
#Custom colorblind pallette, see: https://stackoverflow.com/questions/57153428/r-plot-color-combinations-that-are-colorblind-accessible
customvermillion<-rgb(213/255,94/255,0/255)
custombluegreen<-rgb(0/255,158/255,115/255)
customblue<-rgb(0/255,114/255,178/255)
customskyblue<-rgb(86/255,180/255,233/255)
customreddishpurple<-rgb(204/255,121/255,167/255)
Metadata:
metadata <- read_csv("Metadata.csv")
[36m──[39m [1m[1mColumn specification[1m[22m [36m───────────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
.default = col_double(),
`Sample Name` = [31mcol_character()[39m,
Replicate = [31mcol_character()[39m,
Type = [31mcol_character()[39m,
SizeFraction = [31mcol_character()[39m,
Season = [31mcol_character()[39m,
OxCond = [31mcol_character()[39m
)
[36mℹ[39m Use [38;5;235m[48;5;253m[38;5;235m[48;5;253m`spec()`[48;5;253m[38;5;235m[49m[39m for the full column specifications.
Import SRA table and match SRA IDs with sample IDs in metadata file
SRARunTable <- read_csv("sra_data/SraRunTable.txt")
[36m──[39m [1m[1mColumn specification[1m[22m [36m───────────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
.default = col_character(),
AvgSpotLen = [32mcol_double()[39m,
Bases = [32mcol_double()[39m,
Bytes = [32mcol_double()[39m,
ReleaseDate = [34mcol_datetime(format = "")[39m,
Depth_m = [32mcol_double()[39m,
CH4_uM = [32mcol_double()[39m,
H2S_Um = [32mcol_double()[39m,
Oxygen_uM = [32mcol_double()[39m,
Particulate_Sulfur_uM = [32mcol_double()[39m,
salinity = [32mcol_double()[39m,
Temperature_degree_C = [32mcol_double()[39m,
TZVS_uM = [32mcol_double()[39m
)
[36mℹ[39m Use [38;5;235m[48;5;253m[38;5;235m[48;5;253m`spec()`[48;5;253m[38;5;235m[49m[39m for the full column specifications.
metadata <- left_join(metadata, SRARunTable, by = 'Sample Name')
DADA2 results:
# Import Count table. Skip first row of tsv file, which is just some text
count_table <- read_tsv(file="dada2_export/ASVs_counts.tsv")
Missing column names filled in: 'X1' [1]
[36m──[39m [1m[1mColumn specification[1m[22m [36m───────────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
.default = col_double(),
X1 = [31mcol_character()[39m
)
[36mℹ[39m Use [38;5;235m[48;5;253m[38;5;235m[48;5;253m`spec()`[48;5;253m[38;5;235m[49m[39m for the full column specifications.
# And specify that the first column of data are rownames
count_table <- column_to_rownames(count_table, var = colnames(count_table)[1])
# Import taxonomy of ASVs
taxonomy <- read_tsv(file="dada2_export/ASVs_taxonomy.tsv")
Missing column names filled in: 'X1' [1]
[36m──[39m [1m[1mColumn specification[1m[22m [36m───────────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
X1 = [31mcol_character()[39m,
Kingdom = [31mcol_character()[39m,
Supergroup = [31mcol_character()[39m,
Division = [31mcol_character()[39m,
Class = [31mcol_character()[39m,
Order = [31mcol_character()[39m,
Family = [31mcol_character()[39m,
Genus = [31mcol_character()[39m,
Species = [31mcol_character()[39m
)
# And specify that the first column of data are rownames
taxonomy <- column_to_rownames(taxonomy, var = colnames(taxonomy)[1])
# Use rarecurve, from the Vegan package. Rarcurve expects the dataset as a dataframe so we need to use as.data.frame again:
count_table_df <- as.data.frame(count_table)
# Plot the rarefaction curves, color-coding by the colors listed in sample_info_tab, which indicate sample type, and transforming using t() again
# Running this 5-10 samples at a time because otherwise it takes a long time to render
rarecurve(t(count_table_df), step=100, cex=0.5, ylab="ASVs", label=T)
count_table_no_singletons <- filter(count_table,rowSums(count_table)>1)
# retains all ASVs (out of 14176)
and change sample names from NCBI ID to our internal sample IDs
# Modify taxa names in count_table_no_singletons, which are the NCBI SRA numbers. Want to use our internal sample key
key <- SRARunTable %>% select(Run, 'Sample Name')
x <- (t(count_table_no_singletons))
x <- as.data.frame(cbind(x, Run = rownames(x)))
y <- t(left_join(x, key, by = "Run"))
colnames(y) <- y['Sample Name',]
y <- y[ !(rownames(y) %in% c('Sample Name', 'Run')), ]
count_table_2 <- type_convert(as.data.frame(y))
[36m──[39m [1m[1mColumn specification[1m[22m [36m───────────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
.default = col_double()
)
[36mℹ[39m Use [38;5;235m[48;5;253m[38;5;235m[48;5;253m`spec()`[48;5;253m[38;5;235m[49m[39m for the full column specifications.
This process takes a LONG time so run once and save .RData object In the Dada2 tools, there are no options to build a tree (unlike in Qiime2) but we can build it here using DECIPHER and phangorn
(Based on https://f1000research.com/articles/5-1492/v2)
Make an alignment using tools from Decipher (Note- alignment step takes several hours. Commented out for now. Only need to run once)
## import fasta
# fas <- "dada2_export/ASVs.fa"
# seqs <- readDNAStringSet(fas)
# seqs
#
# # perform the alignment
# aligned <- AlignSeqs(seqs) # automatically detects and uses all cores
#
# # view the alignment in a browser (optional)
# BrowseSeqs(aligned, highlight=0)
#
# # write out aligned sequence file
# writeXStringSet(aligned, file="ASVs.aligned.fasta")
Use phangorn package to build tree. Here we are building a maximum likelihood neighbor-joining tree. (Also takes a while to run. Comment out for now.)
# phang.align <- phyDat(as(aligned, "matrix"), type="DNA") # convert to phyDat format
# dm <- dist.ml(phang.align) # calculate pairwise distance matrix
# treeNJ <- NJ(dm) # perform neighbor-joining tree method
# fit = pml(treeNJ, data=phang.align) # compute intermal max likelihood
Since the step above takes a long time, save all variables up to this point in environment as RData object
save.image("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_tree.RData")
Re-load
load("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_tree.RData")
Here we will do ordinations using the phyloseq package, which first requires making phyloseq objects out of each of our input data tables (in the last tutorial, I imported the tree using phyloseq so it is already a phyloseq object)
ASV = otu_table(count_table_2, taxa_are_rows = TRUE)
There were 15 warnings (use warnings() to see them)
TAX = tax_table(as.matrix(taxonomy))
META = sample_data(data.frame(metadata, row.names = metadata$`Sample Name`))
TREE = phy_tree(fit$tree)
First check that the inputs are in compatible formats by checking for ASV names with the phyloseq function, taxa_names
head(taxa_names(TAX))
[1] "ASV_1" "ASV_2" "ASV_3" "ASV_4" "ASV_5" "ASV_6"
head(taxa_names(ASV))
[1] "ASV_1" "ASV_2" "ASV_3" "ASV_4" "ASV_5" "ASV_6"
head(taxa_names(TREE))
[1] "ASV_1" "ASV_2" "ASV_3" "ASV_4" "ASV_5" "ASV_6"
And check sample names were also detected
head(sample_names(ASV))
[1] "AE3a103A" "AE3b103A" "AE1b900AM" "AE3a103B" "AE3b103B" "AE3a198B"
head(sample_names(META))
[1] "AE3a103A" "AE3b103A" "AE3a198A" "AE3b198A" "AE3a234A" "AE3b234A"
And make the phyloseq object
ps <- phyloseq(ASV, TAX, META , TREE)
Check some features of the phyloseq object
rank_names(ps)
[1] "Kingdom" "Supergroup" "Division" "Class" "Order" "Family" "Genus"
[8] "Species"
table(tax_table(ps)[, "Supergroup"], exclude = NULL)
Alveolata Amoebozoa Apusozoa Archaeplastida Excavata Hacrobia
8880 9 45 108 9 395
Opisthokonta Rhizaria Stramenopiles <NA>
768 2405 1086 471
unique(tax_table(ps)[, "Supergroup"])
Taxonomy Table: [10 taxa by 1 taxonomic ranks]:
Supergroup
ASV_1 "Alveolata"
ASV_2 "Rhizaria"
ASV_6 "Stramenopiles"
ASV_18 "Opisthokonta"
ASV_78 "Hacrobia"
ASV_148 "Archaeplastida"
ASV_193 NA
ASV_557 "Apusozoa"
ASV_1114 "Amoebozoa"
ASV_2665 "Excavata"
Filter out those ambigious Supergroup annotations- losing 471 ASVs
ps <- subset_taxa(ps, !is.na(Supergroup) & !Supergroup %in% c("", "NA"))
table(tax_table(ps)[, "Supergroup"], exclude = NULL)
Alveolata Amoebozoa Apusozoa Archaeplastida Excavata Hacrobia
8880 9 45 108 9 395
Opisthokonta Rhizaria Stramenopiles
768 2405 1086
Check out the Division names
table(tax_table(ps)[, "Division"], exclude = NULL)
Apicomplexa Apusomonadidae Centroheliozoa Cercozoa
29 26 40 246
Chlorophyta Choanoflagellida Ciliophora Cryptophyta
64 54 407 50
Dinoflagellata Discoba Foraminifera Fungi
8330 1 2 57
Haptophyta Hilomonadea Katablepharidophyta Lobosa
215 17 2 9
Mesomycetozoa Metamonada Metazoa Ochrophyta
17 8 561 453
Opalozoa Opisthokonta_X Perkinsea Picozoa
216 14 5 61
Pseudofungi Radiolaria Rhodophyta Sagenista
72 2155 4 186
Stramenopiles_X Streptophyta Telonemia <NA>
61 38 27 278
Filter out any with “NA” as Division
ps <- subset_taxa(ps, !is.na(Division) & !Division %in% c(""))
table(tax_table(ps)[, "Division"], exclude = NULL)
Apicomplexa Apusomonadidae Centroheliozoa Cercozoa
29 26 40 246
Chlorophyta Choanoflagellida Ciliophora Cryptophyta
64 54 407 50
Dinoflagellata Discoba Foraminifera Fungi
8330 1 2 57
Haptophyta Hilomonadea Katablepharidophyta Lobosa
215 17 2 9
Mesomycetozoa Metamonada Metazoa Ochrophyta
17 8 561 453
Opalozoa Opisthokonta_X Perkinsea Picozoa
216 14 5 61
Pseudofungi Radiolaria Rhodophyta Sagenista
72 2155 4 186
Stramenopiles_X Streptophyta Telonemia
61 38 27
After the above, 13,427 ASVs remain from the original 14,177
Eliminate the libraries that didn’t have many sequences, AE3a198A, AE3b314A, AE2a200A, AE2b900AN, AE2a200B, AE2a267B, AE2a900BN
taxa_to_keep <- !sample_names(ps) %in% c("AE3a198A","AE3b314A","AE2a200A","AE2b900AN","AE2a200B","AE2a267B","AE2a900BN")
ps <- prune_samples(taxa_to_keep, ps)
41 samples remain and stil 13,427 ASVs
Check rarefaction curve again to make sure those low-sqeuencing-effort samples have been removed
rarecurve(t(otu_table(ps)), step=100, cex=0.5, ylab="ASVs", label=T)
Have to do this because you may have removed the root of your tree when pruning). (I found this handy function from here which picks the longest branch to root from).
There were 15 warnings (use warnings() to see them)
# first define function from link above to find furthest outgroup
pick_new_outgroup <- function(tree.unrooted){
require("magrittr")
require("data.table")
require("ape") # ape::Ntip
# tablify parts of tree that we need.
treeDT <-
cbind(
data.table(tree.unrooted$edge),
data.table(length = tree.unrooted$edge.length)
)[1:Ntip(tree.unrooted)] %>%
cbind(data.table(id = tree.unrooted$tip.label))
# Take the longest terminal branch as outgroup
new.outgroup <- treeDT[which.max(length)]$id
return(new.outgroup) }
# then run on my phyloseq tree
my.tree <- phy_tree(ps)
out.group <- pick_new_outgroup(my.tree)
out.group
[1] "ASV_10740"
# Then use this outgroup to root the tree
new.tree1 <- ape::root(my.tree, outgroup=out.group, resolve.root=TRUE)
phy_tree(ps) <- new.tree1
# Check if tree is binary (dichotomous not multichotomous)
is.binary.tree(phy_tree(ps))
[1] TRUE
# If false, would have to run
# new.tree2 <- ape::multi2di(new.tree1)
# phy_tree(ps) <- new.tree2
# phy_tree(ps)
Check overall how the phyla are distributed among samples. Phyloseq makes this easy
# First aglomerate the ASVs at the phylum level using the phyloseq function, tax_glom
DivisionGlommed = tax_glom(ps, "Division")
# There are many phyla here, so have to make a custom color palette by interpolating from an existing one in RColorBrewer
colourCount = length(table(tax_table(ps)[, "Division"], exclude = NULL))
getPalette = colorRampPalette(brewer.pal(11, "Spectral"))
DivisionPalette = getPalette(colourCount)
# and plot
plot_bar(DivisionGlommed, x = "Sample", fill = "Division") +
scale_fill_manual(values = DivisionPalette)
Plot compositional (relative abundances) instead of absolute abundance using microbiome::transform
ps_ra <- microbiome::transform(ps, transform = "compositional")
(otu_table(ps_ra))[1:5,1:5]
OTU Table: [5 taxa and 5 samples]
taxa are rows
AE3a103A AE3b103A AE1b900AM AE3a103B AE3b103B
ASV_1 4.046390e-04 0.000105531 2.462054e-05 0.000000e+00 2.400346e-05
ASV_2 0.000000e+00 0.000000000 3.132963e-02 0.000000e+00 5.600807e-05
ASV_3 6.674871e-03 0.014117702 2.265089e-02 3.696079e-03 1.055352e-02
ASV_4 1.244014e-03 0.001524337 1.231027e-05 4.769134e-05 6.720968e-04
ASV_5 2.675299e-05 0.000000000 0.000000e+00 7.948557e-06 1.040150e-04
# Then aglomerate the ASVs at the phylum level using the phyloseq function, tax_glom
DivisionGlommed_RA = tax_glom(ps_ra, "Division")
# and plot
Division_barplot <- plot_bar(DivisionGlommed_RA, x = "Sample", fill = "Division") +
scale_fill_manual(values = DivisionPalette) +
theme(legend.text = element_text(size = 10))
Division_barplot
# export
ggsave("Figures/Division_barplot.eps",Division_barplot, width = 15, height = 5, units = c("in"))
Lots of dinoflagellates and radiolaria. Makes sense. But the above is the distribution from all samples. Next make plots that indicate distributions across environmental gradients. Calculate averages and use bubble plots
Get average relative abundances from sample replicates
otu_table_mean_ra <-
mutate(data.frame(otu_table(ps_ra)), "103A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a103A","AE3b103A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "198A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3b198A")), na.rm = TRUE)) %>% # Sample AE3a198A was removed
mutate(data.frame(otu_table(ps_ra)), "234A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a234A","AE3b234A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "295A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a295A","AE3b295A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "314A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a314A")), na.rm = TRUE)) %>% # Sample AE3b314A was removed
mutate(data.frame(otu_table(ps_ra)), "900AM" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a900AM","AE1b900AM")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "103B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a103B","AE3b103B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "198B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a198B","AE3b198B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "234B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a234B","AE3b234B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "295B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a295B","AE3b295B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "314B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a314B","AE3b314B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "900BM" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a900BM","AE1b900BM")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "143A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a143A","AE2b143A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "200A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2b200A")), na.rm = TRUE)) %>% # AE2a200A was removed
mutate(data.frame(otu_table(ps_ra)), "237A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a237A","AE2b237A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "247A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a247A","AE2b247A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "267A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a267A","AE2b267A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "900AN" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a900AN")), na.rm = TRUE)) %>% # AE2b900AN was removed
mutate(data.frame(otu_table(ps_ra)), "143B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a143B","AE2b143B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "200B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2b200B")), na.rm = TRUE)) %>% # AE2a200B was removed
mutate(data.frame(otu_table(ps_ra)), "237B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a237B","AE2b237B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "247B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a247B","AE2b247B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "267B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2b267B")), na.rm = TRUE)) %>% # AE2a267B was removed
mutate(data.frame(otu_table(ps_ra)), "900BN" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2b900BN")), na.rm = TRUE)) # AE2a900BN was removed
otu_table_mean_ra <- otu_table_mean_ra[,unique(metadata$Replicate)]
otu_table_mean_ra
Make into new phyloseq object
metadata2 <- unique(select(metadata,!c('Sample Name',Type,colnames(SRARunTable))))
META2 <- sample_data(data.frame(metadata2, row.names = metadata2$Replicate))
ps_ra_mean <- phyloseq(otu_table(otu_table_mean_ra, taxa_are_rows = TRUE), TAX, TREE, META2)
# First aglomerate the ASVs at the phylum level using the phyloseq function, tax_glom
ps_ra_mean_division <- tax_glom(ps_ra_mean, "Division")
# and check by bar plotting
plot_bar(ps_ra_mean_division, x = "Sample", fill = "Division") +
scale_fill_manual(values = DivisionPalette)
Extract mean relative abundance, glommed by division, from the phyloseq object and pair it to taxonomic data
division_df <- data.frame(otu_table(ps_ra_mean_division))
colnames(division_df) <- colnames(otu_table(ps_ra_mean_division))
division_df$ASV <- rownames(division_df)
otu_table_mean_ra <- left_join(division_df, as_tibble(taxonomy, rownames = "ASV"), by = "ASV")
otu_table_mean_ra
Pivot longer
otu_table_mean_ra <- pivot_longer(otu_table_mean_ra, cols = unique(metadata$Replicate), names_to = "Replicate", values_to = "Mean_RA")
otu_table_mean_ra
Join metadata
otu_table_mean_ra <- left_join(otu_table_mean_ra, unique(select(metadata, c("Replicate", "Depth", "SizeFraction", "Season", "OxCond", "Fluorescence", "BeamAtt", "O2", "Temp", "Salinity", "H2S", "ParticulateS", "TZVS", "CH4", "NO3", "NO2", "NH4", "PO4", "Chemoautotrophy", "BNP", "MicroAbun(x10^8 L^-1)", "FlagAbun(x10^5 L-1)", "VLP(x10^8 L-1)"))), by = "Replicate")
# Replace zeroes in RA with NA (better for plotting)
otu_table_mean_ra$Mean_RA[otu_table_mean_ra$Mean_RA == 0] <- NA
otu_table_mean_ra
# reorder some factors to make them plot in the order I want
otu_table_mean_ra$OxCond <- factor(otu_table_mean_ra$OxCond, levels = c("Oxycline", "ShallowAnoxic", "Euxinic"))
otu_table_mean_ra$SizeFraction <- factor(otu_table_mean_ra$SizeFraction, levels = c("PA", "FL"))
euk_divisions_bubbleplot_color <- ggplot(otu_table_mean_ra,aes (x = as.character(Depth), y = reorder(Division, Mean_RA, function(x){sum(x,na.rm = TRUE)}), color = OxCond)) +
geom_point(aes(size =Mean_RA))+
facet_wrap(Season~SizeFraction, scales = "free_x", drop= TRUE, ncol = 4) +
scale_size(range = c(1,15)) +
scale_size_area(breaks = c(0,.25,.5,.75,1), max_size = 6) +
xlab("Depth") +
ylab("") +
labs(size="Relative Abundance", color = "Redox Condition") +
scale_color_manual(values = c("blue", "red", "brown4")) +
theme_bw() +
theme(axis.text.x = element_text(size=10),
axis.text.y = element_text(size=10),
axis.title.x= element_text(size=12),
axis.title.y= element_text(size=12))
Scale for 'size' is already present. Adding another scale for 'size', which will replace
the existing scale.
euk_divisions_bubbleplot_color
Save figure
ggsave(filename = "Figures/euk_divisions_bubbleplot_color.eps", plot = euk_divisions_bubbleplot_color, units = c("in"), width = 10, height = 6, dpi = 300)
save.image("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_bubbleplots.RData")
Re-load
load("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_bubbleplots.RData")
Import
NA
NA
Get sample names
bac_samples <- colnames(bac_counts)[2:49]
arch_samples <- colnames(arch_counts)[2:47]
bac_samples
[1] "AB2a237B" "AB2b267B" "AB2a267B" "AB2b237B" "AB3b103B" "AB2b200A" "AB2b247B"
[8] "AB3a103B" "AB3b314B" "AB2a247B" "AB3b295B" "AB2a200B" "AB3a295B" "AB3a314B"
[15] "AB2a237A" "AB3b198B" "AB2b143B" "AB2a143B" "AB3a198A" "AB3b234B" "AB3a198B"
[22] "AB2a247A" "AB2b200B" "AB3b198A" "AB3b234A" "AB3a234B" "AB3a295A" "AB3a314A"
[29] "AB2a900BN" "AB3a900B" "AB2b900BN" "AB2b267A" "AB3b295A" "AB2a900AN" "AB2b143A"
[36] "AB3b103A" "AB3a103A" "AB3a234A" "AB2b247A" "AB3b314A" "AB2b900AN" "AB2a267A"
[43] "AB1b900A" "AB2b237A" "AB2a143A" "AB1b900B" "AB2a200A" "AB3a900A"
arch_samples
[1] "AA3a314A" "AA2a237B" "AA2b237A" "AA3a900B" "AA1b900A" "AA3a103B" "AA2a247A"
[8] "AA1b900B" "AA3b198A" "AA2b237B" "AA2b143A" "AA3b103A" "AA2a143A" "AA3a103A"
[15] "AA3a198A" "AA2a200A" "AA3b198B" "AA3b234B" "AA2a200B" "AA2a143B" "AA3a234A"
[22] "AA3a295B" "AA2b247B" "AA3a234B" "AA2b247A" "AA3b234A" "AA3b314A" "AA3a314B"
[29] "AA3a198B" "AA3b295A" "AA3a295A" "AA2a237A" "AA3a900A" "AA2b200A" "AA2b267B"
[36] "AA3b314B" "AA2b143B" "AA2b200B" "AA3b103B" "AA3b295B" "AA2a267B" "AA2a247B"
[43] "AA2a900AN" "AA2b900BN" "AA2a900BN" "AA2b900AN"
Make separate taxonomy and count variables
arch_OTU <- arch_counts[,c("#OTU ID",arch_samples)]
arch_taxonomy <- arch_counts %>%
select(-arch_samples) %>%
select(-Sum)
arch_OTU
arch_taxonomy
bac_OTU <- bac_counts[,c("#OTU ID",bac_samples)]
bac_taxonomy <- bac_counts %>%
select(-bac_samples) %>%
select(-Sum) %>%
select(-"Interesting close relatives")
bac_OTU
bac_taxonomy
bac_OTU <- type_convert(as.data.frame(bac_OTU))
[36m──[39m [1m[1mColumn specification[1m[22m [36m──────────────────────────────────────────────────────────────────────[39m
cols(
`#OTU ID` = [31mcol_character()[39m
)
rownames(bac_OTU) <- bac_OTU$`#OTU ID`
bac_OTU <- bac_OTU[,!names(bac_OTU) %in% (c("#OTU ID"))]
bac_OTU = otu_table(bac_OTU, taxa_are_rows = TRUE)
#
arch_OTU <- type_convert(as.data.frame(arch_OTU))
[36m──[39m [1m[1mColumn specification[1m[22m [36m──────────────────────────────────────────────────────────────────────[39m
cols(
`#OTU ID` = [31mcol_character()[39m
)
rownames(arch_OTU) <- arch_OTU$`#OTU ID`
arch_OTU <- arch_OTU[,!names(arch_OTU) %in% (c("#OTU ID"))]
arch_OTU = otu_table(arch_OTU, taxa_are_rows = TRUE)
#
bac_TAX <- type_convert(as.data.frame(bac_taxonomy))
[36m──[39m [1m[1mColumn specification[1m[22m [36m──────────────────────────────────────────────────────────────────────[39m
cols(
`#OTU ID` = [31mcol_character()[39m,
`Refined taxonomy` = [31mcol_character()[39m,
`taxonomy-1` = [31mcol_character()[39m,
`taxonomy-2` = [31mcol_character()[39m,
`taxonomy-3` = [31mcol_character()[39m,
`taxonomy-4` = [31mcol_character()[39m,
`taxonomy-5` = [31mcol_character()[39m,
`taxonomy-6` = [31mcol_character()[39m,
`taxonomy-7` = [31mcol_character()[39m,
`taxonomy-8` = [31mcol_character()[39m
)
rownames(bac_TAX) <- bac_TAX$`#OTU ID`
bac_TAX <- bac_TAX[,!names(bac_TAX) %in% (c("#OTU ID"))]
bac_TAX = tax_table(as.matrix(bac_TAX))
#
arch_TAX <- type_convert(as.data.frame(arch_taxonomy))
[36m──[39m [1m[1mColumn specification[1m[22m [36m──────────────────────────────────────────────────────────────────────[39m
cols(
`#OTU ID` = [31mcol_character()[39m,
`taxonomy-1` = [31mcol_character()[39m,
`taxonomy-2` = [31mcol_character()[39m,
`taxonomy-3` = [31mcol_character()[39m,
`taxonomy-4` = [31mcol_character()[39m,
`taxonomy-5` = [31mcol_character()[39m,
`taxonomy-6` = [31mcol_character()[39m,
`taxonomy-7` = [31mcol_character()[39m
)
rownames(arch_TAX) <- arch_TAX$`#OTU ID`
arch_TAX <- arch_TAX[,!names(arch_TAX) %in% (c("#OTU ID"))]
arch_TAX = tax_table(as.matrix(arch_TAX))
#
META = sample_data(data.frame(metadata, row.names = metadata$`Sample Name`))
#
ps_bac <- phyloseq(bac_OTU, bac_TAX, META)
ps_arch <- phyloseq(arch_OTU, arch_TAX, META)
Filter out the samples with low sequencing effort. These were previously identified for itags paper
taxa_to_keep_b <- !sample_names(ps_bac) %in% c("AB3a900A","AB2a200A","AB2b267A")
ps_bac <- prune_samples(taxa_to_keep_b, ps_bac)
taxa_to_keep_a <- !sample_names(ps_arch) %in% c("AA2b900AN","AA2a247B","AA2a900BN","AA2b900BN")
ps_arch <- prune_samples(taxa_to_keep_a, ps_arch)
First calculate relative abdunance of bac and arch OTU tables
ps_bac_ra <- microbiome::transform(ps_bac, transform = "compositional")
(otu_table(ps_bac_ra))[1:5,1:5]
OTU Table: [5 taxa and 5 samples]
taxa are rows
AB2a237B AB2b267B AB2a267B AB2b237B AB3b103B
denovo231149 0.2125960670 0.5369359318 0.3471806593 0.2100273349 7.856409e-05
denovo348086 0.0014568676 0.0138326887 0.0023327600 0.0012899324 6.043392e-06
denovo302903 0.0001624382 0.0034240801 0.0009700197 0.0001292371 3.021696e-06
denovo104772 0.0310612290 0.0005181989 0.0143657171 0.0281322315 2.243911e-02
denovo309274 0.0620260104 0.0440345135 0.0583425623 0.0681006294 1.869221e-02
ps_arch_ra <- microbiome::transform(ps_arch, transform = "compositional")
(otu_table(ps_arch_ra))[1:5,1:5]
OTU Table: [5 taxa and 5 samples]
taxa are rows
AA3a314A AA2a237B AA2b237A AA3a900B AA1b900A
denovo180502 0.181534684 0.08790871 0.21422859 0.0028892496 0.032998864
denovo80843 0.129102961 0.07914636 0.05450465 0.0043043922 0.029778944
denovo217943 0.025004870 0.21867787 0.16709842 0.0009620487 0.021373974
denovo94410 0.009662359 0.13278112 0.10651172 0.0007448119 0.008834576
denovo199225 0.019405384 0.02748757 0.10588944 0.0008813608 0.009570436
Remove rows of glommed taxa from the full dataframe if their sum across all samples doesn’t exceed 5% (RA > 0.05)
# Bacteria
x <- taxa_sums(ps_bac_ra)
# keepTaxa <- base::which(x > .05)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_bac_ra_pruned <- prune_taxa(keepTaxa, ps_bac_ra)
ps_bac_pruned <- prune_taxa(keepTaxa, ps_bac)
ps_bac_ra_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 124 taxa and 45 samples ]
sample_data() Sample Data: [ 45 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 124 taxa by 10 taxonomic ranks ]
ps_bac_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 124 taxa and 45 samples ]
sample_data() Sample Data: [ 45 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 124 taxa by 10 taxonomic ranks ]
# Archaea
x <- taxa_sums(ps_arch_ra)
# keepTaxa <- base::which(x > .05)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_arch_ra_pruned <- prune_taxa(keepTaxa, ps_arch_ra)
ps_arch_pruned <- prune_taxa(keepTaxa, ps_arch)
ps_arch_ra_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 52 taxa and 42 samples ]
sample_data() Sample Data: [ 42 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 52 taxa by 8 taxonomic ranks ]
ps_arch_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 52 taxa and 42 samples ]
sample_data() Sample Data: [ 42 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 52 taxa by 8 taxonomic ranks ]
# Eukaryotes
x <- taxa_sums(ps_ra)
# keepTaxa <- base::which(x > .05)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_euk_ra_pruned <- prune_taxa(keepTaxa, ps_ra)
ps_euk_pruned <- prune_taxa(keepTaxa, ps)
ps_euk_ra_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 123 taxa and 41 samples ]
sample_data() Sample Data: [ 41 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 123 taxa by 8 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 123 tips and 122 internal nodes ]
ps_euk_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 123 taxa and 41 samples ]
sample_data() Sample Data: [ 41 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 123 taxa by 8 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 123 tips and 122 internal nodes ]
Trimmed to 124 bacteria OTUs, 52 archaea OTUs, and 123 eukaryotic ASVs (299 total). Proceed with this dataset of the most abundant OTUs for correlations and network analyses…
To do the multi-domain analysis, the sample names from each phyloseq object must match. These currently have “B” for bacteria, A, E etc. Remove this letter from sample names so that “AE2a247B”, “AA2a247B”, “AB2a247B” all become just “Type” from the metadata sheet [IntNov1FL in this case- for Interface, November, rep 1, free-living].
Import my SampleKey
samplekey <- read_csv("SampleKey.csv")
[36m──[39m [1m[1mColumn specification[1m[22m [36m──────────────────────────────────────────────────────────────────────[39m
cols(
Type = [31mcol_character()[39m,
SampleID_bac = [31mcol_character()[39m,
SampleID_arch = [31mcol_character()[39m,
SampleID_euk = [31mcol_character()[39m
)
Change the sample names in the otu tables to sample “Type”
# Archaea
# remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_ra_pruned)))
# sort SampleKey by order of column names from ps_arch_ra_pruned
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_ra_pruned))))
# replace col names of otu table from ps_arch_ra_pruned
sample_names(ps_arch_ra_pruned) <- samplekey_A$Type
# and ps_arch_pruned
sample_names(ps_arch_pruned) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_ra_pruned)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_ra_pruned))))
sample_names(ps_bac_ra_pruned) <- samplekey_B$Type
sample_names(ps_bac_pruned) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_ra_pruned)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_ra_pruned))))
sample_names(ps_euk_ra_pruned) <- samplekey_E$Type
sample_names(ps_euk_pruned) <- samplekey_E$Type
Move all pruned otu tables into one table by matching the sample Type- will use this for SparCC
alldomains_df <- bind_rows(data.frame(otu_table(ps_bac_pruned)), data.frame(otu_table(ps_arch_pruned)), data.frame(otu_table(ps_euk_pruned)))
alldomains_df
Change row names from “denovoXXX” to meaningful names
alldomains_df_full <- cbind(ID = rownames(alldomains_df), alldomains_df)
# start with only first rows, which are bacteria. make one column of meaningful labels
temp1 <- left_join(alldomains_df_full[1:dim(otu_table(ps_bac_pruned))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
temp1$New_ID <- paste(temp1$ID, temp1$"taxonomy-2", temp1$"taxonomy-3", temp1$"taxonomy-4")
temp1 <- select(temp1,-colnames(bac_taxonomy[,2:11]))
# next rows are the archaea
temp2 <- left_join(alldomains_df_full[sum(dim(otu_table(ps_bac_pruned))[1],1):sum(dim(otu_table(ps_bac_pruned))[1],dim(otu_table(ps_arch_pruned))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp2$New_ID <- paste(temp2$ID, temp2$"taxonomy-2", temp2$"taxonomy-3")
temp2 <- select(temp2,-colnames(arch_taxonomy[,2:9]))
# last rows are eukarya
euk_taxonomy <- cbind("#ASV ID" = rownames(taxonomy), taxonomy)
temp3 <- left_join(alldomains_df_full[sum(dim(otu_table(ps_arch_pruned))[1], dim(otu_table(ps_bac_pruned))[1],1):sum(dim(otu_table(ps_arch_pruned))[1], dim(otu_table(ps_bac_pruned))[1],dim(otu_table(ps_euk_pruned))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
temp3$New_ID <- paste(temp3$ID, temp3$"Supergroup", temp3$"Division", temp3$"Class", temp3$"Order")
temp3 <- select(temp3,-colnames(euk_taxonomy[,2:9]))
# combine back all 3 domains, with new names as row names in a dataframe
alldomains_df_full <- rbind(temp1, temp2, temp3)
alldomains_df_full <- data.frame(alldomains_df_full)
rownames(alldomains_df_full) <- alldomains_df_full$New_ID
alldomains_df_full <- select(alldomains_df_full, -c("ID","New_ID"))
Remove columns with NAs. These are samples for which the library for at least one domain didn’t work (can’t do correlations with missing values in columns)
alldomains_df_full <- alldomains_df_full %>%
select_if(~ !any(is.na(.)))
t(alldomains_df_full)[1:5,1:5]
denovo231149 Proteobacteria Gammaproteobacteria Chromatiales
SubOxNov1FL 41881
AnoxNov2FL 216557
SubOxNov2FL 86132
OxicMay2FL 26
MicroOxNov2PA 7356
denovo348086 Deferribacteres Deferribacterales SAR406_clade(Marine_group_A)
SubOxNov1FL 287
AnoxNov2FL 5579
SubOxNov2FL 529
OxicMay2FL 2
MicroOxNov2PA 123
denovo302903 Deferribacteres Deferribacterales SAR406_clade(Marine_group_A)
SubOxNov1FL 32
AnoxNov2FL 1381
SubOxNov2FL 53
OxicMay2FL 1
MicroOxNov2PA 37
denovo104772 Proteobacteria Alphaproteobacteria SAR11_clade
SubOxNov1FL 6119
AnoxNov2FL 209
SubOxNov2FL 11537
OxicMay2FL 7426
MicroOxNov2PA 20010
denovo309274 Deferribacteres Deferribacterales SAR406_clade(Marine_group_A)
SubOxNov1FL 12219
AnoxNov2FL 17760
SubOxNov2FL 27928
OxicMay2FL 6186
MicroOxNov2PA 1362
alldomains_df <- alldomains_df %>%
select_if(~ !any(is.na(.)))
t(alldomains_df)[1:5,1:5]
denovo231149 denovo348086 denovo302903 denovo104772 denovo309274
SubOxNov1FL 41881 287 32 6119 12219
AnoxNov2FL 216557 5579 1381 209 17760
SubOxNov2FL 86132 529 53 11537 27928
OxicMay2FL 26 2 1 7426 6186
MicroOxNov2PA 7356 123 37 20010 1362
36 samples remain for correlation
Simlarly, make pruned datasets of the most abundant OTUs/ASVs in the oxycline, anoxic, and euxinic samples as separate datasets
Pull out samples and taxa from each redox regime
# Pull out oxycline bacteria sample IDs
oxyclinetypes_bac <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_bac)) %>%
filter(OxCond == "Oxycline") %>%
select("Sample Name")
oxyclinetypes_bac <- unlist(c(unique(oxyclinetypes_bac)), use.names = FALSE)
# Pull out all bacteria from oxycline
ps_bac_oxycline <- prune_samples(oxyclinetypes_bac, ps_bac)
ps_bac_ra_oxycline <- prune_samples(oxyclinetypes_bac, ps_bac_ra)
# Pull out oxycline archaea sample IDs
oxyclinetypes_arch <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_arch)) %>%
filter(OxCond == "Oxycline") %>%
select("Sample Name")
oxyclinetypes_arch <- unlist(c(unique(oxyclinetypes_arch)), use.names = FALSE)
# Pull out all archaea from oxycline
ps_arch_oxycline <- prune_samples(oxyclinetypes_arch, ps_arch)
ps_arch_ra_oxycline <- prune_samples(oxyclinetypes_arch, ps_arch_ra)
# Pull out oxycline eukaryotic sample IDs
oxyclinetypes_euk <- metadata %>%
filter(`Sample Name` %in% sample_names(ps)) %>%
filter(OxCond == "Oxycline") %>%
select("Sample Name")
oxyclinetypes_euk <- unlist(c(unique(oxyclinetypes_euk)), use.names = FALSE)
# Pull out all eukaryotes from oxycline
ps_euk_oxycline <- prune_samples(oxyclinetypes_euk, ps)
ps_euk_ra_oxycline <- prune_samples(oxyclinetypes_euk, ps_ra)
Filter out low abundance taxa from the oxycline samples. Use 5% as cutoff
# Bacteria
x <- taxa_sums(ps_bac_ra_oxycline)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_bac_ra_oxycline_pruned <- prune_taxa(keepTaxa, ps_bac_ra_oxycline)
ps_bac_oxycline_pruned <- prune_taxa(keepTaxa, ps_bac_oxycline)
ps_bac_ra_oxycline_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 79 taxa and 23 samples ]
sample_data() Sample Data: [ 23 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 79 taxa by 10 taxonomic ranks ]
ps_bac_oxycline_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 79 taxa and 23 samples ]
sample_data() Sample Data: [ 23 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 79 taxa by 10 taxonomic ranks ]
# Archaea
x <- taxa_sums(ps_arch_ra_oxycline)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_arch_ra_oxycline_pruned <- prune_taxa(keepTaxa, ps_arch_ra_oxycline)
ps_arch_oxycline_pruned <- prune_taxa(keepTaxa, ps_arch_oxycline)
ps_arch_ra_oxycline_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 36 taxa and 24 samples ]
sample_data() Sample Data: [ 24 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 36 taxa by 8 taxonomic ranks ]
ps_arch_oxycline_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 36 taxa and 24 samples ]
sample_data() Sample Data: [ 24 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 36 taxa by 8 taxonomic ranks ]
# Eukaryotes
x <- taxa_sums(ps_euk_ra_oxycline)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_euk_ra_oxycline_pruned <- prune_taxa(keepTaxa, ps_euk_ra_oxycline)
ps_euk_oxycline_pruned <- prune_taxa(keepTaxa, ps_euk_oxycline)
ps_euk_ra_oxycline_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 76 taxa and 21 samples ]
sample_data() Sample Data: [ 21 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 76 taxa by 8 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 76 tips and 75 internal nodes ]
ps_euk_oxycline_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 76 taxa and 21 samples ]
sample_data() Sample Data: [ 21 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 76 taxa by 8 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 76 tips and 75 internal nodes ]
79 bacteria, 36 archaea, 76 eukaryota remain
Change the sample names in the otu tables to “Type”
# Archaea
# remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_ra_oxycline_pruned)))
# sort SampleKey by order of column names from ps_arch_ra_oxycline_pruned
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_ra_oxycline_pruned))))
# replace col names of otu table from ps_arch_ra_oxycline_pruned
sample_names(ps_arch_ra_oxycline_pruned) <- samplekey_A$Type
# and ps_arch_pruned
sample_names(ps_arch_oxycline_pruned) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_ra_oxycline_pruned)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_ra_oxycline_pruned))))
sample_names(ps_bac_ra_oxycline_pruned) <- samplekey_B$Type
sample_names(ps_bac_oxycline_pruned) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_ra_oxycline_pruned)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_ra_oxycline_pruned))))
sample_names(ps_euk_ra_oxycline_pruned) <- samplekey_E$Type
sample_names(ps_euk_oxycline_pruned) <- samplekey_E$Type
Move all pruned otu tables into one table by matching the sample Type- will use this for SparCC
alldomains_df_oxycline <- bind_rows(data.frame(otu_table(ps_bac_oxycline_pruned)), data.frame(otu_table(ps_arch_oxycline_pruned)), data.frame(otu_table(ps_euk_oxycline_pruned)))
alldomains_df_oxycline
Change row names from “denovoXXX” to meaningful names
alldomains_df_oxycline <- cbind(ID = rownames(alldomains_df_oxycline), alldomains_df_oxycline)
# start with only first rows, which are bacteria. make one column of meaningful labels
temp1 <- left_join(alldomains_df_oxycline[1:dim(otu_table(ps_bac_oxycline_pruned))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
temp1$New_ID <- paste(temp1$ID, temp1$"taxonomy-2", temp1$"taxonomy-3", temp1$"taxonomy-4")
temp1 <- select(temp1,-colnames(bac_taxonomy[,2:11]))
# next rows are the archaea
temp2 <- left_join(alldomains_df_oxycline[sum(dim(otu_table(ps_bac_oxycline_pruned))[1],1):sum(dim(otu_table(ps_bac_oxycline_pruned))[1],dim(otu_table(ps_arch_oxycline_pruned))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp2$New_ID <- paste(temp2$ID, temp2$"taxonomy-2", temp2$"taxonomy-3")
temp2 <- select(temp2,-colnames(arch_taxonomy[,2:9]))
# last rows are eukarya
euk_taxonomy <- cbind("#ASV ID" = rownames(taxonomy), taxonomy)
temp3 <- left_join(alldomains_df_oxycline[sum(dim(otu_table(ps_arch_oxycline_pruned))[1], dim(otu_table(ps_bac_oxycline_pruned))[1],1):sum(dim(otu_table(ps_arch_oxycline_pruned))[1], dim(otu_table(ps_bac_oxycline_pruned))[1],dim(otu_table(ps_euk_oxycline_pruned))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
temp3$New_ID <- paste(temp3$ID, temp3$"Supergroup", temp3$"Division", temp3$"Class", temp3$"Order")
temp3 <- select(temp3,-colnames(euk_taxonomy[,2:9]))
# combine back all 3 domains, with new names as row names in a dataframe
alldomains_df_oxycline <- rbind(temp1, temp2, temp3)
alldomains_df_oxycline <- data.frame(alldomains_df_oxycline)
rownames(alldomains_df_oxycline) <- alldomains_df_oxycline$New_ID
alldomains_df_oxycline <- select(alldomains_df_oxycline, -c("ID","New_ID"))
alldomains_df_oxycline
Remove columns with NAs. These are samples for which the library for at least one domain didn’t work (can’t do correlations with missing values in columns)
alldomains_df_oxycline <- alldomains_df_oxycline %>%
select_if(~ !any(is.na(.)))
alldomains_df_oxycline
21 samples remain for correlation
Pull out samples from shallow anoxic regime
# Pull out anoxic layer bacteria sample IDs
anoxictypes_bac <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_bac)) %>%
filter(OxCond == "ShallowAnoxic") %>%
select("Sample Name")
anoxictypes_bac <- unlist(c(unique(anoxictypes_bac)), use.names = FALSE)
# Pull out all bacteria from anoxic layer
ps_bac_anoxic <- prune_samples(anoxictypes_bac, ps_bac)
ps_bac_ra_anoxic <- prune_samples(anoxictypes_bac, ps_bac_ra)
# Pull out anoxic layer archaea sample IDs
anoxictypes_arch <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_arch)) %>%
filter(OxCond == "ShallowAnoxic") %>%
select("Sample Name")
anoxictypes_arch <- unlist(c(unique(anoxictypes_arch)), use.names = FALSE)
# Pull out all archaea from anoxic layer
ps_arch_anoxic<- prune_samples(anoxictypes_arch, ps_arch)
ps_arch_ra_anoxic <- prune_samples(anoxictypes_arch, ps_arch_ra)
# Pull out anoxic layer eukaryotic sample IDs
anoxictypes_euk <- metadata %>%
filter(`Sample Name` %in% sample_names(ps)) %>%
filter(OxCond == "ShallowAnoxic") %>%
select("Sample Name")
anoxictypes_euk <- unlist(c(unique(anoxictypes_euk)), use.names = FALSE)
# Pull out all eukaryotes from anoxic layer
ps_euk_anoxic <- prune_samples(anoxictypes_euk, ps)
ps_euk_ra_anoxic <- prune_samples(anoxictypes_euk, ps_ra)
Filter out low abundance taxa from the oxycline samples. Use 5% as cutoff
# Bacteria
x <- taxa_sums(ps_bac_ra_anoxic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_bac_ra_anoxic_pruned <- prune_taxa(keepTaxa, ps_bac_ra_anoxic)
ps_bac_anoxic_pruned <- prune_taxa(keepTaxa, ps_bac_anoxic)
ps_bac_ra_anoxic_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 32 taxa and 15 samples ]
sample_data() Sample Data: [ 15 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 32 taxa by 10 taxonomic ranks ]
ps_bac_anoxic_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 32 taxa and 15 samples ]
sample_data() Sample Data: [ 15 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 32 taxa by 10 taxonomic ranks ]
# Archaea
x <- taxa_sums(ps_arch_ra_anoxic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_arch_ra_anoxic_pruned <- prune_taxa(keepTaxa, ps_arch_ra_anoxic)
ps_arch_anoxic_pruned <- prune_taxa(keepTaxa, ps_arch_anoxic)
ps_arch_ra_anoxic_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 19 taxa and 13 samples ]
sample_data() Sample Data: [ 13 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 19 taxa by 8 taxonomic ranks ]
ps_arch_anoxic_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 19 taxa and 13 samples ]
sample_data() Sample Data: [ 13 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 19 taxa by 8 taxonomic ranks ]
# Eukaryotes
x <- taxa_sums(ps_euk_ra_anoxic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_euk_ra_anoxic_pruned <- prune_taxa(keepTaxa, ps_euk_ra_anoxic)
ps_euk_anoxic_pruned <- prune_taxa(keepTaxa, ps_euk_anoxic)
ps_euk_ra_anoxic_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 37 taxa and 14 samples ]
sample_data() Sample Data: [ 14 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 37 taxa by 8 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 37 tips and 36 internal nodes ]
ps_euk_anoxic_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 37 taxa and 14 samples ]
sample_data() Sample Data: [ 14 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 37 taxa by 8 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 37 tips and 36 internal nodes ]
32 bacteria, 19 archaea, 37 eukaryota remain
Change the sample names in the otu tables to “Type”
# Archaea
# remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_ra_anoxic_pruned)))
# sort SampleKey by order of column names from ps_arch_ra_anoxic_pruned
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_ra_anoxic_pruned))))
# replace col names of otu table from ps_arch_ra_anoxic_pruned
sample_names(ps_arch_ra_anoxic_pruned) <- samplekey_A$Type
# and ps_arch_pruned
sample_names(ps_arch_anoxic_pruned) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_ra_anoxic_pruned)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_ra_anoxic_pruned))))
sample_names(ps_bac_ra_anoxic_pruned) <- samplekey_B$Type
sample_names(ps_bac_anoxic_pruned) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_ra_anoxic_pruned)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_ra_anoxic_pruned))))
sample_names(ps_euk_ra_anoxic_pruned) <- samplekey_E$Type
sample_names(ps_euk_anoxic_pruned) <- samplekey_E$Type
Move all pruned otu tables into one table by matching the sample Type- will use this for SparCC
alldomains_df_anoxic <- bind_rows(data.frame(otu_table(ps_bac_anoxic_pruned)), data.frame(otu_table(ps_arch_anoxic_pruned)), data.frame(otu_table(ps_euk_anoxic_pruned)))
alldomains_df_anoxic
Change row names from “denovoXXX” to meaningful names
alldomains_df_anoxic <- cbind(ID = rownames(alldomains_df_anoxic), alldomains_df_anoxic)
# start with only first rows, which are bacteria. make one column of meaningful labels
temp1 <- left_join(alldomains_df_anoxic[1:dim(otu_table(ps_bac_anoxic_pruned))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
temp1$New_ID <- paste(temp1$ID, temp1$"taxonomy-2", temp1$"taxonomy-3", temp1$"taxonomy-4")
temp1 <- select(temp1,-colnames(bac_taxonomy[,2:11]))
# next rows are the archaea
temp2 <- left_join(alldomains_df_anoxic[sum(dim(otu_table(ps_bac_anoxic_pruned))[1],1):sum(dim(otu_table(ps_bac_anoxic_pruned))[1],dim(otu_table(ps_arch_anoxic_pruned))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp2$New_ID <- paste(temp2$ID, temp2$"taxonomy-2", temp2$"taxonomy-3")
temp2 <- select(temp2,-colnames(arch_taxonomy[,2:9]))
# last rows are eukarya
euk_taxonomy <- cbind("#ASV ID" = rownames(taxonomy), taxonomy)
temp3 <- left_join(alldomains_df_anoxic[sum(dim(otu_table(ps_arch_anoxic_pruned))[1], dim(otu_table(ps_bac_anoxic_pruned))[1],1):sum(dim(otu_table(ps_arch_anoxic_pruned))[1], dim(otu_table(ps_bac_anoxic_pruned))[1],dim(otu_table(ps_euk_anoxic_pruned))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
temp3$New_ID <- paste(temp3$ID, temp3$"Supergroup", temp3$"Division", temp3$"Class", temp3$"Order")
temp3 <- select(temp3,-colnames(euk_taxonomy[,2:9]))
# combine back all 3 domains, with new names as row names in a dataframe
alldomains_df_anoxic <- rbind(temp1, temp2, temp3)
alldomains_df_anoxic <- data.frame(alldomains_df_anoxic)
rownames(alldomains_df_anoxic) <- alldomains_df_anoxic$New_ID
alldomains_df_anoxic <- select(alldomains_df_anoxic, -c("ID","New_ID"))
alldomains_df_anoxic
Remove columns with NAs. These are samples for which the library for at least one domain didn’t work (can’t do correlations with missing values in columns)
alldomains_df_anoxic <- alldomains_df_anoxic %>%
select_if(~ !any(is.na(.)))
alldomains_df_anoxic
11 samples remain for correlation
Pull out samples from shallow anoxic regime
# Pull out anoxic layer bacteria sample IDs
euxinictypes_bac <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_bac)) %>%
filter(OxCond == "Euxinic") %>%
select("Sample Name")
euxinictypes_bac <- unlist(c(unique(euxinictypes_bac)), use.names = FALSE)
# Pull out all bacteria from euxinic layer
ps_bac_euxinic <- prune_samples(euxinictypes_bac, ps_bac)
ps_bac_ra_euxinic <- prune_samples(euxinictypes_bac, ps_bac_ra)
# Pull out euxinic layer archaea sample IDs
euxinictypes_arch <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_arch)) %>%
filter(OxCond == "Euxinic") %>%
select("Sample Name")
euxinictypes_arch <- unlist(c(unique(euxinictypes_arch)), use.names = FALSE)
# Pull out all archaea from euxinic layer
ps_arch_euxinic<- prune_samples(euxinictypes_arch, ps_arch)
ps_arch_ra_euxinic <- prune_samples(euxinictypes_arch, ps_arch_ra)
# Pull out euxinic layer eukaryotic sample IDs
euxinictypes_euk <- metadata %>%
filter(`Sample Name` %in% sample_names(ps)) %>%
filter(OxCond == "Euxinic") %>%
select("Sample Name")
euxinictypes_euk <- unlist(c(unique(euxinictypes_euk)), use.names = FALSE)
# Pull out all eukaryotes from euxinic layer
ps_euk_euxinic <- prune_samples(euxinictypes_euk, ps)
ps_euk_ra_euxinic <- prune_samples(euxinictypes_euk, ps_ra)
Filter out low abundance taxa from the oxycline samples. Use 5% as cutoff
# Bacteria
x <- taxa_sums(ps_bac_ra_euxinic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_bac_ra_euxinic_pruned <- prune_taxa(keepTaxa, ps_bac_ra_euxinic)
ps_bac_euxinic_pruned <- prune_taxa(keepTaxa, ps_bac_euxinic)
ps_bac_ra_euxinic_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 16 taxa and 7 samples ]
sample_data() Sample Data: [ 7 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 16 taxa by 10 taxonomic ranks ]
ps_bac_euxinic_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 16 taxa and 7 samples ]
sample_data() Sample Data: [ 7 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 16 taxa by 10 taxonomic ranks ]
# Archaea
x <- taxa_sums(ps_arch_ra_euxinic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_arch_ra_euxinic_pruned <- prune_taxa(keepTaxa, ps_arch_ra_euxinic)
ps_arch_euxinic_pruned <- prune_taxa(keepTaxa, ps_arch_euxinic)
ps_arch_ra_euxinic_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 16 taxa and 5 samples ]
sample_data() Sample Data: [ 5 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 16 taxa by 8 taxonomic ranks ]
ps_arch_euxinic_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 16 taxa and 5 samples ]
sample_data() Sample Data: [ 5 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 16 taxa by 8 taxonomic ranks ]
# Eukaryotes
x <- taxa_sums(ps_euk_ra_euxinic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_euk_ra_euxinic_pruned <- prune_taxa(keepTaxa, ps_euk_ra_euxinic)
ps_euk_euxinic_pruned <- prune_taxa(keepTaxa, ps_euk_euxinic)
ps_euk_ra_euxinic_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 20 taxa and 6 samples ]
sample_data() Sample Data: [ 6 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 20 taxa by 8 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 20 tips and 19 internal nodes ]
ps_euk_euxinic_pruned
phyloseq-class experiment-level object
otu_table() OTU Table: [ 20 taxa and 6 samples ]
sample_data() Sample Data: [ 6 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 20 taxa by 8 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 20 tips and 19 internal nodes ]
16 bacteria, 16 archaea, 20 eukaryota remain
Change the sample names in the otu tables to “Type”
# Archaea
# remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_ra_euxinic_pruned)))
# sort SampleKey by order of column names from ps_arch_ra_euxinic_pruned
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_ra_euxinic_pruned))))
# replace col names of otu table from ps_arch_ra_euxinic_pruned
sample_names(ps_arch_ra_euxinic_pruned) <- samplekey_A$Type
# and ps_arch_pruned
sample_names(ps_arch_euxinic_pruned) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_ra_euxinic_pruned)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_ra_euxinic_pruned))))
sample_names(ps_bac_ra_euxinic_pruned) <- samplekey_B$Type
sample_names(ps_bac_euxinic_pruned) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_ra_euxinic_pruned)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_ra_euxinic_pruned))))
sample_names(ps_euk_ra_euxinic_pruned) <- samplekey_E$Type
sample_names(ps_euk_euxinic_pruned) <- samplekey_E$Type
Move all pruned otu tables into one table by matching the sample Type- will use this for SparCC
alldomains_df_euxinic <- bind_rows(data.frame(otu_table(ps_bac_euxinic_pruned)), data.frame(otu_table(ps_arch_euxinic_pruned)), data.frame(otu_table(ps_euk_euxinic_pruned)))
alldomains_df_euxinic
Change row names from “denovoXXX” to meaningful names
alldomains_df_euxinic <- cbind(ID = rownames(alldomains_df_euxinic), alldomains_df_euxinic)
# start with only first rows, which are bacteria. make one column of meaningful labels
temp1 <- left_join(alldomains_df_euxinic[1:dim(otu_table(ps_bac_euxinic_pruned))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
temp1$New_ID <- paste(temp1$ID, temp1$"taxonomy-2", temp1$"taxonomy-3", temp1$"taxonomy-4")
temp1 <- select(temp1,-colnames(bac_taxonomy[,2:11]))
# next rows are the archaea
temp2 <- left_join(alldomains_df_euxinic[sum(dim(otu_table(ps_bac_euxinic_pruned))[1],1):sum(dim(otu_table(ps_bac_euxinic_pruned))[1],dim(otu_table(ps_arch_euxinic_pruned))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp2$New_ID <- paste(temp2$ID, temp2$"taxonomy-2", temp2$"taxonomy-3")
temp2 <- select(temp2,-colnames(arch_taxonomy[,2:9]))
# last rows are eukarya
euk_taxonomy <- cbind("#ASV ID" = rownames(taxonomy), taxonomy)
temp3 <- left_join(alldomains_df_euxinic[sum(dim(otu_table(ps_arch_euxinic_pruned))[1], dim(otu_table(ps_bac_euxinic_pruned))[1],1):sum(dim(otu_table(ps_arch_euxinic_pruned))[1], dim(otu_table(ps_bac_euxinic_pruned))[1],dim(otu_table(ps_euk_euxinic_pruned))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
temp3$New_ID <- paste(temp3$ID, temp3$"Supergroup", temp3$"Division", temp3$"Class", temp3$"Order")
temp3 <- select(temp3,-colnames(euk_taxonomy[,2:9]))
# combine back all 3 domains, with new names as row names in a dataframe
alldomains_df_euxinic <- rbind(temp1, temp2, temp3)
alldomains_df_euxinic <- data.frame(alldomains_df_euxinic)
rownames(alldomains_df_euxinic) <- alldomains_df_euxinic$New_ID
alldomains_df_euxinic <- select(alldomains_df_euxinic, -c("ID","New_ID"))
alldomains_df_euxinic
Remove columns with NAs. These are samples for which the library for at least one domain didn’t work (can’t do correlations with missing values in columns)
alldomains_df_euxinic <- alldomains_df_euxinic %>%
select_if(~ !any(is.na(.)))
alldomains_df_euxinic
4 samples remain for correlation
This is largely based on BVCN tutorials NOTE- input for SparCC should be raw count data (after filtering out low-abundance ASVs). The function does a log-ratio transformation to account for compositionality
sparcctable_alldomains <- sparcc(t(alldomains_df))
Put sample names back into result tables
rownames(sparcctable_alldomains$Cor) <- rownames(alldomains_df_full)
colnames(sparcctable_alldomains$Cor) <- rownames(alldomains_df_full)
rownames(sparcctable_alldomains$Cov) <- rownames(alldomains_df_full)
colnames(sparcctable_alldomains$Cov) <- rownames(alldomains_df_full)
sparcctable_alldomains$Cor[1:2,1:2]
denovo231149 Proteobacteria Gammaproteobacteria Chromatiales
denovo231149 Proteobacteria Gammaproteobacteria Chromatiales 1.0000000
denovo348086 Deferribacteres Deferribacterales SAR406_clade(Marine_group_A) 0.6531043
denovo348086 Deferribacteres Deferribacterales SAR406_clade(Marine_group_A)
denovo231149 Proteobacteria Gammaproteobacteria Chromatiales 0.6531043
denovo348086 Deferribacteres Deferribacterales SAR406_clade(Marine_group_A) 1.0000000
Plot correlation
plotableSparcc <- sparcctable_alldomains$Cor %>% reorder_cormat %>% get_upper_tri() %>% reshape2::melt() %>% na.omit()
Sparcc_plot <- plotableSparcc %>% ggplot(aes(x = Var2, y = Var1, fill = value)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
Sparcc_plot
ggsave("figures/sparcc_corr_alldomains.eps",Sparcc_plot, width = 35, height = 35, units = c("in"))
Calculate Sparcc p-values by bootstrapping- TAKES A LONG TIME
tp0 <- proc.time()
out2 <- sparccboot(t(alldomains_df), R = 1000, ncpus = 2)
tp1 <- proc.time()
tp1 - tp0
user system elapsed
49520.017 3391.854 28336.411
The above took ~14 hours to run 1000 iterations
Extract p-values
outP <- pval.sparccboot(out2)
data.frame(outP$cors, outP$pvals) %>% head
cors <- outP$cors
pvals <- outP$pvals
sparCCpcors <- diag(0.5, nrow = dim(sparcctable_alldomains$Cor)[1], ncol = dim(sparcctable_alldomains$Cor)[1])
sparCCpcors[upper.tri(sparCCpcors, diag=FALSE)] <- cors
sparCCpcors <- sparCCpcors + t(sparCCpcors)
sparCCpval <- diag(0.5, nrow = dim(sparcctable_alldomains$Cor)[1], ncol = dim(sparcctable_alldomains$Cor)[1])
sparCCpval[upper.tri(sparCCpval, diag=FALSE)] <- pvals
sparCCpval <- sparCCpval + t(sparCCpval)
rownames(sparCCpcors) <- rownames(alldomains_df_full)
colnames(sparCCpcors) <- rownames(alldomains_df_full)
rownames(sparCCpval) <- rownames(alldomains_df_full)
colnames(sparCCpval) <- rownames(alldomains_df_full)
sparCCpcors[1:2, 1:2]
denovo231149 Proteobacteria Gammaproteobacteria Chromatiales
denovo231149 Proteobacteria Gammaproteobacteria Chromatiales 1.0000000
denovo348086 Deferribacteres Deferribacterales SAR406_clade(Marine_group_A) 0.6487105
denovo348086 Deferribacteres Deferribacterales SAR406_clade(Marine_group_A)
denovo231149 Proteobacteria Gammaproteobacteria Chromatiales 0.6487105
denovo348086 Deferribacteres Deferribacterales SAR406_clade(Marine_group_A) 1.0000000
sparCCpval[1:2, 1:2]
denovo231149 Proteobacteria Gammaproteobacteria Chromatiales
denovo231149 Proteobacteria Gammaproteobacteria Chromatiales 1
denovo348086 Deferribacteres Deferribacterales SAR406_clade(Marine_group_A) 0
denovo348086 Deferribacteres Deferribacterales SAR406_clade(Marine_group_A)
denovo231149 Proteobacteria Gammaproteobacteria Chromatiales 0
denovo348086 Deferribacteres Deferribacterales SAR406_clade(Marine_group_A) 1
Reorder for plotting
reordered_all_sparcc <- reorder_cor_and_p(sparCCpcors, sparCCpval)
reordered_sparccCor <- reordered_all_sparcc$r
reordered_sparccP<- reordered_all_sparcc$p
sparccCor_processed <- reordered_sparccCor %>% get_upper_tri() %>% reshape2::melt() %>% na.omit() %>% rename(cor = value)
sparccP_processed <- reordered_sparccP %>% get_upper_tri() %>% reshape2::melt() %>% na.omit() %>% rename(p = value)
# join the two data frames
SparccP <- left_join(sparccCor_processed, sparccP_processed, by = c("Var1", "Var2")) %>%
# # remove self correlations
# filter(Var1 != Var2) %>%
# calculate the false discovery rate to adjust for multiple p values
mutate(fdr = p.adjust(p, method = "BH"))
And plot correlation with p-values. Circles mean that the relationship is sig. at p = 0.05 level, based on bootstrapping
fdrThresh <- 0.01 # fdr threshold
sparccOkP <- SparccP%>% filter(fdr < fdrThresh)
SparccP_plot <- SparccP %>% ggplot(aes(x = Var2, y = Var1, fill = cor)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + geom_point(data = sparccOkP, shape = 1)
SparccP_plot
ggsave("figures/sparcc_corr_alldomains_w_pvals.eps",SparccP_plot, width = 20, height = 20, units = c("in"))
Save environment again
save.image("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_sparcc_bootstrap.RData")
Or load if coming back
load("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_sparcc_bootstrap.RData")
Try the SpiecEasi method, which accounts for sparse data, as described in the SpiecEasi publication, spieceasi github, and BVCN lessons 1.2. This reduces the clumps (eg. sparse relationships that are secondary or teriary, not direct relationships).
Make functions from tutorial
convertSEToTable <- function(se_out,sp.names){
#This is just a fancy helper function to get the data in a comparable format to the output of lesson 1 so we can make a similar plot. We will cover other methods for visualizing this type of output in future lessons.
secor <- cov2cor(as.matrix(getOptCov(se_out))) # See spieceasi documentation for how to pull out weights for comparison
elist <- summary(triu(secor*getRefit(se_out), k=1))
elist[,1] <- sp.names[elist[,1]]
elist[,2] <- sp.names[elist[,2]]
elist[,4] <- paste(elist[,1],elist[,2])
full_e <- expand.grid(sp.names,sp.names)
rownames(full_e) <- paste(full_e[,1],full_e[,2])
full_e[,"Weight"] <- 0
full_e[elist[,4],"Weight"] <- elist[,3]
x <- expand.grid(1:length(sp.names),1:length(sp.names))
full_e[x[,"Var1"]>x[,"Var2"],"Weight"] <- NA
return(as.data.frame(full_e,stringsAsFactors=F))
}
Follow the spieceasi documentation to find optimal parameters
Remove samples from the phyloseq objects that are not in all 3 domains and reorder samples so they are in same order in all 3 objects
bac_arch_common <- intersect(sample_names(ps_bac_ra_pruned), sample_names(ps_arch_ra_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_ra_pruned))
ps_bac_pruned <- prune_samples(all_common, ps_bac_pruned)
ps_arch_pruned <- prune_samples(all_common, ps_arch_pruned)
ps_euk_pruned <- prune_samples(all_common, ps_euk_pruned)
ps_bac_ra_pruned <- prune_samples(all_common, ps_bac_ra_pruned)
ps_arch_ra_pruned <- prune_samples(all_common, ps_arch_ra_pruned)
ps_euk_ra_pruned <- prune_samples(all_common, ps_euk_ra_pruned)
otu_table(ps_arch_pruned) <- otu_table(ps_arch_pruned)[,sample_names(ps_bac_ra_pruned)]
otu_table(ps_euk_pruned) <- otu_table(ps_euk_pruned)[,sample_names(ps_bac_ra_pruned)]
sample_data(ps_bac_pruned)
Sample Data: [36 samples by 66 sample variables]:
sample_data(ps_arch_pruned)
Sample Data: [36 samples by 66 sample variables]:
sample_data(ps_euk_pruned)
Sample Data: [36 samples by 66 sample variables]:
#Run Spieceasi
pargs <- list(seed=10010)
se <- spiec.easi(list(ps_bac_pruned, ps_arch_pruned, ps_euk_pruned), method='glasso', lambda.min.ratio=5e-1,nlambda=100, pulsar.params=list(rep.num=50))
Applying data transformations...
Selecting model with pulsar using stars...
Fitting final estimate with glasso...
done
getStability(se)
[1] 0.04977937
the above takes a couple of minutes to run
#This is just a fancy helper function to get the data in a comparable format to the output of above
tab.se <- convertSEToTable(se,sp.names=colnames(t(alldomains_df_full)))
#Plot
plot.se <- ggplot(tab.se,aes(x = Var1, y = Var2, fill = Weight)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
plot(plot.se)
ggsave("figures/spieceasi_alldomains.eps",plot.se, width = 35, height = 35, units = c("in"))
Note- only the significant values above show up in the heatmap above (ie. there is no “p-value”)
bac_arch_common <- intersect(sample_names(ps_bac_oxycline_pruned), sample_names(ps_arch_oxycline_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_oxycline_pruned))
ps_bac_oxycline_pruned <- prune_samples(all_common, ps_bac_oxycline_pruned)
ps_arch_oxycline_pruned <- prune_samples(all_common, ps_arch_oxycline_pruned)
ps_euk_oxycline_pruned <- prune_samples(all_common, ps_euk_oxycline_pruned)
otu_table(ps_arch_oxycline_pruned) <- otu_table(ps_arch_oxycline_pruned)[,sample_names(ps_bac_oxycline_pruned)]
otu_table(ps_euk_oxycline_pruned) <- otu_table(ps_euk_oxycline_pruned)[,sample_names(ps_bac_oxycline_pruned)]
sample_data(ps_bac_oxycline_pruned)
Sample Data: [21 samples by 66 sample variables]:
sample_data(ps_arch_oxycline_pruned)
Sample Data: [21 samples by 66 sample variables]:
sample_data(ps_euk_oxycline_pruned)
Sample Data: [21 samples by 66 sample variables]:
#Run Spieceasi
pargs <- list(seed=10010)
se.oxycline <- spiec.easi(list(ps_bac_oxycline_pruned, ps_arch_oxycline_pruned, ps_euk_oxycline_pruned), method='glasso', lambda.min.ratio=5e-1,nlambda=100, pulsar.params=list(rep.num=50))
Applying data transformations...
Selecting model with pulsar using stars...
Fitting final estimate with glasso...
done
getStability(se.oxycline)
[1] 0.04934384
the above takes a couple of minutes to run
# Pull out spp names from oxcyline phyloseq objects and concatenate
#This is just a fancy helper function to get the data in a comparable format to the output of above
tab.se.oxycline <- convertSEToTable(se.oxycline, sp.names=colnames(t(alldomains_df_oxycline)))
#Plot
plot.se.oxycline <- ggplot(tab.se.oxycline,aes(x = Var1, y = Var2, fill = Weight)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
plot(plot.se.oxycline)
ggsave("figures/spieceasi_alldomains_oxycline.eps",plot.se.oxycline, width = 35, height = 35, units = c("in"))
bac_arch_common <- intersect(sample_names(ps_bac_anoxic_pruned), sample_names(ps_arch_anoxic_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_anoxic_pruned))
ps_bac_anoxic_pruned <- prune_samples(all_common, ps_bac_anoxic_pruned)
ps_arch_anoxic_pruned <- prune_samples(all_common, ps_arch_anoxic_pruned)
ps_euk_anoxic_pruned <- prune_samples(all_common, ps_euk_anoxic_pruned)
otu_table(ps_arch_anoxic_pruned) <- otu_table(ps_arch_anoxic_pruned)[,sample_names(ps_bac_anoxic_pruned)]
otu_table(ps_euk_anoxic_pruned) <- otu_table(ps_euk_anoxic_pruned)[,sample_names(ps_bac_anoxic_pruned)]
sample_data(ps_bac_anoxic_pruned)
Sample Data: [11 samples by 66 sample variables]:
sample_data(ps_arch_anoxic_pruned)
Sample Data: [11 samples by 66 sample variables]:
sample_data(ps_euk_anoxic_pruned)
Sample Data: [11 samples by 66 sample variables]:
#Run Spieceasi
pargs <- list(seed=10010)
se.anoxic <- spiec.easi(list(ps_bac_anoxic_pruned, ps_arch_anoxic_pruned, ps_euk_anoxic_pruned), method='glasso', lambda.min.ratio=5e-1,nlambda=100, pulsar.params=list(rep.num=50))
Applying data transformations...
Selecting model with pulsar using stars...
Fitting final estimate with glasso...
done
getStability(se.anoxic)
[1] 0.04597576
the above takes a couple of minutes to run
# Pull out spp names from oxcyline phyloseq objects and concatenate
#This is just a fancy helper function to get the data in a comparable format to the output of above
tab.se.anoxic <- convertSEToTable(se.anoxic, sp.names=colnames(t(alldomains_df_anoxic)))
#Plot
plot.se.anoxic <- ggplot(tab.se.anoxic,aes(x = Var1, y = Var2, fill = Weight)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
plot(plot.se.anoxic)
ggsave("figures/spieceasi_alldomains_anoxic.eps",plot.se.anoxic, width = 35, height = 35, units = c("in"))
bac_arch_common <- intersect(sample_names(ps_bac_euxinic_pruned), sample_names(ps_arch_euxinic_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_euxinic_pruned))
ps_bac_euxinic_pruned <- prune_samples(all_common, ps_bac_euxinic_pruned)
ps_arch_euxinic_pruned <- prune_samples(all_common, ps_arch_euxinic_pruned)
ps_euk_euxinic_pruned <- prune_samples(all_common, ps_euk_euxinic_pruned)
otu_table(ps_arch_euxinic_pruned) <- otu_table(ps_arch_euxinic_pruned)[,sample_names(ps_bac_euxinic_pruned)]
otu_table(ps_euk_euxinic_pruned) <- otu_table(ps_euk_euxinic_pruned)[,sample_names(ps_bac_euxinic_pruned)]
sample_data(ps_bac_euxinic_pruned)
Sample Data: [4 samples by 66 sample variables]:
sample_data(ps_arch_euxinic_pruned)
Sample Data: [4 samples by 66 sample variables]:
sample_data(ps_euk_euxinic_pruned)
Sample Data: [4 samples by 66 sample variables]:
#Run Spieceasi
pargs <- list(seed=10010)
se.euxinic <- spiec.easi(list(ps_bac_euxinic_pruned, ps_arch_euxinic_pruned, ps_euk_euxinic_pruned), method='glasso', lambda.min.ratio=5e-2,nlambda=1000, pulsar.params=list(rep.num=50))
Applying data transformations...
Selecting model with pulsar using stars...
Optimal lambda may be larger than the supplied valuesFitting final estimate with glasso...
done
getStability(se.euxinic)
[1] 0.06672217
I tried many parameters on the above but cannot get a satisfactory solution. There are just too few samples (4 after quality filtering) to do SpiecEasi on the euxinic depths only.
save.image("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_spieceasi.RData")
Or load if coming back
load("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_spieceasi.RData")
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Build using igraph
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se)
table(as.numeric(adj.mat))
0 1
83831 5570
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se)))
weighted.adj.mat <- se.cor*getRefit(se)
#Convert to graph objects
grph.unweighted <- adj2igraph(adj.mat)
grph <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph)$name <- rownames(alldomains_df)
# V(grph)
# Make size of nodes proportional to degree (number of connections)
V(grph)$size <- (degree(grph) + 1) # the +1 avoids size zero vertices
# Color edges by connection (positive or negative)
# E(grph)$color <- custombluegreen
# E(grph)$color[E(grph)$weight<0] <- customreddishpurple
# Change width of edges to be proportional to their weights
E(grph)$width <- abs(E(grph)$weight)*10
# Scale node sizes to be smaller
V(grph)$size <- V(grph)$size/2
# Remove low-weight edges (you decide what threshold is right for your network):
# weight_threshold <- 0.07
# grph <- delete.edges(grph,which(abs(E(grph)$weight)<weight_threshold))
# Color nodes by domain
dtype <- c(rep("red",ntaxa(ps_bac_pruned)), rep("green",ntaxa(ps_arch_pruned)), rep("blue",ntaxa(ps_euk_pruned)))
# Plot
plot(grph,
vertex.label=NA,
layout=layout_with_graphopt(grph),
vertex.color=dtype)
title("SpiecEasi Network: All domains, Whole Water Column")
legend("topright",bty = "n",
legend=c("Bacteria","Archaea", "Eukarya"),
fill=c("red","green","blue"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/3domains_alldepths_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph,
vertex.label=NA,
layout=layout_with_graphopt(grph),
vertex.color=dtype)
title("SpiecEasi Network: All domains, Whole Water Column")
legend("topright",bty = "n",
legend=c("Bacteria","Archaea", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
quartz_off_screen
2
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se.oxycline)
table(as.numeric(adj.mat))
0 1
35079 1402
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se.oxycline)))
weighted.adj.mat <- se.cor*getRefit(se.oxycline)
#Convert to graph objects
grph.unweighted.oxycline <- adj2igraph(adj.mat)
grph.oxycline <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph.oxycline)$name <- rownames(alldomains_df_oxycline)
# V(grph.oxycline)
# Make size of nodes proportional to degree (number of connections)
V(grph.oxycline)$size <- (degree(grph.oxycline) + 1) # the +1 avoids size zero vertices
# Color edges by connection (positive or negative)
# E(grph.oxycline)$color <- custombluegreen
# E(grph.oxycline)$color[E(grph.oxycline)$weight<0] <- customreddishpurple
# Change width of edges to be proportional to their weights
E(grph.oxycline)$width <- abs(E(grph.oxycline)$weight)*10
# Scale node sizes to be smaller
V(grph.oxycline)$size <- V(grph.oxycline)$size/2
# Remove low-weight edges (you decide what threshold is right for your network):
# weight_threshold <- 0.07
# grph.oxycline <- delete.edges(grph.oxycline,which(abs(E(grph.oxycline)$weight)<weight_threshold))
# Color nodes by domain
dtype <- c(rep("red",ntaxa(ps_bac_oxycline_pruned)), rep("green",ntaxa(ps_arch_oxycline_pruned)), rep("blue",ntaxa(ps_euk_oxycline_pruned)))
# Plot
plot(grph.oxycline,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline),
vertex.color=dtype)
title("SpiecEasi Network: All domains, Oxycline")
legend("topright",bty = "n",
legend=c("Bacteria","Archaea", "Eukarya"),
fill=c("red","green","blue"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/3domains_oxycline_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.oxycline,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline),
vertex.color=dtype)
title("SpiecEasi Network: All domains, Oxycline")
legend("topright",bty = "n",
legend=c("Bacteria","Archaea", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
quartz_off_screen
2
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se.anoxic)
table(as.numeric(adj.mat))
0 1
7584 160
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se.anoxic)))
weighted.adj.mat <- se.cor*getRefit(se.anoxic)
#Convert to graph objects
grph.unweighted.anoxic <- adj2igraph(adj.mat)
grph.anoxic <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph.anoxic)$name <- rownames(alldomains_df_oxycline)
number of items to replace is not a multiple of replacement length
# V(grph.anoxic)
# Make size of nodes proportional to degree (number of connections)
V(grph.anoxic)$size <- (degree(grph.anoxic) + 1) # the +1 avoids size zero vertices
# Color edges by connection (positive or negative)
# E(grph.anoxic)$color <- custombluegreen
# E(grph.anoxic)$color[E(grph.anoxic)$weight<0] <- customreddishpurple
# Change width of edges to be proportional to their weights
E(grph.anoxic)$width <- abs(E(grph.anoxic)$weight)*10
# Scale node sizes to be smaller
V(grph.anoxic)$size <- V(grph.anoxic)$size/2
# Remove low-weight edges (you decide what threshold is right for your network):
# weight_threshold <- 0.07
# grph.anoxic <- delete.edges(grph.anoxic,which(abs(E(grph.anoxic)$weight)<weight_threshold))
# Color nodes by domain
dtype <- c(rep("red",ntaxa(ps_bac_anoxic_pruned)), rep("green",ntaxa(ps_arch_anoxic_pruned)), rep("blue",ntaxa(ps_euk_anoxic_pruned)))
# Plot
plot(grph.anoxic,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic),
vertex.color=dtype)
title("SpiecEasi Network: All domains, Anoxic Layer")
legend("topright",bty = "n",
legend=c("Bacteria","Archaea", "Eukarya"),
fill=c("red","green","blue"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/3domains_anoxic_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.anoxic,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic),
vertex.color=dtype)
title("SpiecEasi Network: All domains, Anoxic")
legend("topright",bty = "n",
legend=c("Bacteria","Archaea", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
quartz_off_screen
2
All depths
# First change the weights of the edges (the strength of association) to absolute value. This won't work if negative associations are left with negative signs
E(grph)$weight <- abs(E(grph)$weight)
names=V(grph)$name
de=degree(grph)
st=graph.strength(grph)
be=betweenness(grph, normalized=T)
# assemble dataset and match full taxonomy
fulldateset_node_measures <- data.frame(ID=names, degree=de, strength=st, betweenness=be)
# Put back bac taxaonomy
temp1 <- left_join(fulldateset_node_measures[1:dim(otu_table(ps_bac_pruned))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
# delete "Taxonomy-9" and "refined Taxonomy" columns
temp1 <- select(temp1, -"taxonomy-9", -"Refined taxonomy")
temp2 <- left_join(fulldateset_node_measures[sum(dim(otu_table(ps_bac_pruned))[1],1):sum(dim(otu_table(ps_bac_pruned))[1],dim(otu_table(ps_arch_pruned))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp3 <- left_join(fulldateset_node_measures[sum(dim(otu_table(ps_arch_pruned))[1], dim(otu_table(ps_bac_pruned))[1],1):sum(dim(otu_table(ps_arch_pruned))[1], dim(otu_table(ps_bac_pruned))[1],dim(otu_table(ps_euk_pruned))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
# Rename col names to match those from Bac and Arch
temp3 <- temp3 %>%
rename("taxonomy-1" = Kingdom, "taxonomy-2" = Supergroup, "taxonomy-3" = Division, "taxonomy-4" = Class, "taxonomy-5" = Order, "taxonomy-6" = Family, "taxonomy-7" = Genus, "taxonomy-8" = Species)
# combine back all 3 domains, with new names as row names in a dataframe
fulldateset_node_measures <- rbind(temp1, temp2, temp3)
fulldateset_node_measures
Plot betweeness vs centrality (keystone species analysis)
ggplot(fulldateset_node_measures, aes(x = degree, y = betweenness, color = fulldateset_node_measures$"taxonomy-2")) +
geom_point()